---
title: 'Replication R code for: Ballot paper format and vote spoiling at Polish local elections of 2014: An attempt at a causal analysis'
output:
  html_document:
    df_print: paged
Author: Michal Pierzgalski
version: 0.8
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 



```{r}

# Required packages

library(ggplot2)
library(Synth)
library(dplyr)
library(tidyr)
library(readr)

library(devtools)
library(pkgbuild)
install_github('xuyiqing/gsynth')
library(gsynth)
library(panelView)

options(scipen=999)
```

# ---

```{r}

# READ DATA (read.csv : invalid1998_2, invalid2002_2, ..., ncand_com_1998_2014) !!!

# 1998

invalid1998_2 <- read_csv(file.choose()) # Select invalid1998_2.csv (contain data on invalid votes in 1998)


# START

### 1998

invalid1998_3 <- invalid1998_2 %>% filter(szczebel == "RD")

# retrive 'szczebel' (GM, GP, GW) in 2010
level_2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select(TERYT, level_2010 = szczebel) # ncand_com_1998_2014

invalid1998_4 <- left_join( invalid1998_3, level_2010 ) # combine data and commune levels of 2010


add_to_invalid1998 <- ncand_com_1998_2014 %>% filter(elec == 1998) %>% select( TERYT, dist_type, ncand_total, TS, uncon_total ) # 1998

invalid1998_4 <- left_join(invalid1998_4, add_to_invalid1998, by = "TERYT")


invalid1998_4 <- mutate(invalid1998_4, kod_urzedu = as.numeric(kod_urzedu), region = as.numeric(region))


### 2002

invalid2002_3 <- invalid2002_2 %>% filter(szczebel == "RD")

level_2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select(TERYT, level_2010 = szczebel) # ncand_com_1998_2014

invalid2002_4 <- left_join( invalid2002_3, level_2010 )


add_to_invalid2002 <- ncand_com_1998_2014 %>% filter(elec == 2002) %>% select( TERYT, dist_type, ncand_total, TS, uncon_total ) # 2002

invalid2002_4 <- left_join(invalid2002_4, add_to_invalid2002, by = "TERYT")


invalid2002_4 <- mutate(invalid2002_4, kod_urzedu = as.numeric(kod_urzedu), region = as.numeric(region))


### 2006

invalid2006_3 <- invalid2006_2 %>% filter(szczebel == "RD")

level_2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select(TERYT, level_2010 = szczebel) # ncand_com_1998_2014

invalid2006_4 <- left_join( invalid2006_3, level_2010 )


add_to_invalid2006 <- ncand_com_1998_2014 %>% filter(elec == 2006) %>% select( TERYT, dist_type, ncand_total, TS, uncon_total ) # 2006

invalid2006_4 <- left_join(invalid2006_4, add_to_invalid2006, by = "TERYT")


invalid2006_4 <- mutate(invalid2006_4, kod_urzedu = as.numeric(kod_urzedu), region = as.numeric(region))


### 2010

invalid2010_3 <- invalid2010_2 %>% filter(szczebel == "RD")

level_2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select(TERYT, level_2010 = szczebel) # ncand_com_1998_2014

invalid2010_4 <- left_join( invalid2010_3, level_2010 )


add_to_invalid2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select( TERYT, dist_type, ncand_total, TS, uncon_total ) # 2010

invalid2010_4 <- left_join(invalid2010_4, add_to_invalid2010, by = "TERYT")


invalid2010_4 <- mutate(invalid2010_4, kod_urzedu = as.numeric(kod_urzedu), region = as.numeric(region))


### 2014

invalid2014_3 <- invalid2014_2 %>% filter(szczebel == "RD")

level_2010 <- ncand_com_1998_2014 %>% filter(elec == 2010) %>% select(TERYT, level_2010 = szczebel) # ncand_com_1998_2014

invalid2014_4 <- left_join( invalid2014_3, level_2010 )


add_to_invalid2014 <- ncand_com_1998_2014 %>% filter(elec == 2014) %>% select( TERYT, dist_type, ncand_total, TS, uncon_total ) # 2014

invalid2014_4 <- left_join(invalid2014_4, add_to_invalid2014, by = "TERYT") # dist_type_2014: nie ma dzielnic Warszawy oraz problem Zielonej Gory (WO)


invalid2014_4 <- mutate(invalid2014_4, kod_urzedu = as.numeric(kod_urzedu), region = as.numeric(region))

# invalid2014_4_grouped <- filter(invalid2014_4, szczebel == "RD" & (level_1 == "GM" | level_1 == "GP") ) %>% group_by(commune) %>% summarise( min = min(glosow_niewaznych / kart_waznych), max = max(glosow_niewaznych / kart_waznych), median = median(glosow_niewaznych / kart_waznych), mean = mean(glosow_niewaznych / kart_waznych), sd = sd(glosow_niewaznych / kart_waznych), total = sum(glosow_niewaznych) / sum(kart_waznych), region = region[1] )

# ---


# MERGE

invalid <- bind_rows(invalid1998_4, invalid2002_4, invalid2006_4, invalid2010_4, invalid2014_4)


## Plot

ggplot(data = filter(invalid, szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + geom_boxplot( aes( x = as.factor(region), y = glosow_niewaznych / kart_waznych, fill = level_2010 )  ) + facet_grid(. ~ elec)

ggplot(data = filter(invalid, szczebel == "WO" ) ) + geom_boxplot( aes( x = region, y = glosow_niewaznych / kart_waznych )  ) + facet_grid(. ~ elec)

```

# ----


```{r}

# SCM

# 2002-2014
# TERYT for 1998 is recoded (2010)

### Define treatment group (D1)

invalid_exp_new <- mutate(invalid, D1 = ifelse( (elec == 2014 & level_2010 == "GM"), 1, 0 ), invalid_dist = glosow_niewaznych / kart_waznych )

###

# OR

### PLACEBO

invalid_exp_new <- mutate(invalid, D1 = ifelse( (elec == 2010 & level_2010 == "GM"), 1, 0 ), invalid_dist = glosow_niewaznych / kart_waznych )


# OR

invalid_exp_new <- mutate(invalid, D1 = ifelse( (elec == 2014 & level_2010 == "GP"), 1, 0 ), invalid_dist = glosow_niewaznych / kart_waznych )

# OR

invalid_exp_new <- mutate(invalid, D1 = ifelse( (elec == 2014 & level_2010 == "GW"), 1, 0 ), invalid_dist = glosow_niewaznych / kart_waznych )

###

invalid_exp_new <- data.frame( ungroup(invalid_exp_new) )

invalid_total_grouped <- invalid_exp_new %>% group_by(elec, level_2010) %>% summarise( invalid_frac = sum(glosow_niewaznych) / sum(kart_waznych) )

# ----


# Group by elections and TERYT (id)
invalid_exp_group_new <- invalid_exp_new %>% group_by(elec, TERYT) %>% summarise( invalid_median = median( glosow_niewaznych / kart_waznych ), invalid_total = sum(glosow_niewaznych) / sum(kart_waznych), D1 = D1[1], szczebel = szczebel[1], commune = commune[1], region = region[1], ncand_total = ncand_total[1], TS = TS[1], level_2010 = level_2010[1]) # Grouping

invalid_exp_group_new <- data.frame( ungroup(invalid_exp_group_new) )
# ----

# invalid_exp_group_new <- filter(invalid_exp_group_new, TERYT != 300104 & TERYT != 143207 & TERYT != 61407, level_2010 == "GP" | level_2010 == "GM" | level_2010 == "GW" ) # or region %in% c(10, 14, 26, 30)


# Define controls
invalid_exp_group_new <- mutate( invalid_exp_group_new, olpr = ifelse( (level_2010 == "GM") | (level_2010 == "GP" & elec != 2014), 1, 0 ), elec = as.numeric(elec), region = as.numeric(region), ncand_per_seat = (ncand_total / TS) )


### Filter (TS >= 15)

invalid_exp_group_new <- filter(invalid_exp_group_new, level_2010 %in% c("GM", "GW", "GP"), TS >= 15, region %in% c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32)) # - 22 (?) or - 14 (?)

# ----


# Balance panel
invalid_exp_group_temp_new <- invalid_exp_group_new %>% group_by(TERYT) %>% summarise( n = n() ) # Grouping
invalid_exp_group_new <- data.frame( ungroup(invalid_exp_group_new) )
invalid_exp_group_new <- left_join( invalid_exp_group_new, invalid_exp_group_temp_new )
invalid_exp_group_new <- filter(invalid_exp_group_new, n == 5 ) # N pretreatment > 4
# ----

# Select vars
invalid_exp_group_new <- invalid_exp_group_new %>% select( 1:6, 7:8, 10:11, 12, 13, 14 )
# invalid_exp_group_new <- invalid_exp_group_new %>% select( 1:10 )

# Remove ids TERYTs (missing values for dependent var) : 300104, 61407, 143207 !!!
invalid_exp_group_new <- invalid_exp_group_new %>% filter( !( TERYT %in% c(300104, 61407, 143207) ) )



# Stratfied sampling from the population of regions (GM)
set.seed(11)
invalid_exp_group_sample1 <- invalid_exp_group_new %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping

set.seed(21)
invalid_exp_group_sample2 <- invalid_exp_group_new %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping
# ----


# Stratfied sampling from the population of regions (GW)
set.seed(11)
invalid_exp_group_sample3 <- invalid_exp_group_new %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping

set.seed(21)
invalid_exp_group_sample4 <- invalid_exp_group_new %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping
# ----


# Data

# For GM
invalid_exp_sample1 <- data.frame( ungroup(invalid_exp_group_sample1) )
# OR
invalid_exp_sample2 <- data.frame( ungroup(invalid_exp_group_sample2) )


# For GW
invalid_exp_sample3 <- data.frame( ungroup(invalid_exp_group_sample3) )
# OR
invalid_exp_sample4 <- data.frame( ungroup(invalid_exp_group_sample4) )


###

invalid_exp_population <- data.frame( ungroup(invalid_exp_group_new) ) # !!!


### Add socio-econ controls

invalid_exp_population_controls <- left_join(invalid_exp_population, controls_invalid_econ, by = c("TERYT", "elec") ) 
# ---

###

invalid_exp_population_gm_gw <- filter( invalid_exp_population, level_2010 != "GP" )
invalid_exp_population_gm_gp <- filter( invalid_exp_population, level_2010 != "GW" )

### TEMP

invalid_exp_population_gm_gp <- mutate(invalid_exp_population_gm_gp, TERYT = TERYT, elec = as.numeric(elec) )
invalid_exp_population_gm_gp <- data.frame( ungroup(invalid_exp_population_gm_gp) )

set.seed(100)
invalid_exp_population_gm_gp_1 <- invalid_exp_population_gm_gp %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.3*length(unique(TERYT))) )  ) # Grouping

invalid_exp_population_gm_gp_1 <- data.frame( ungroup(invalid_exp_population_gm_gp_1) )

#

invalid_exp_population_sample1 <- data.frame( ungroup(invalid_exp_group_new) )

set.seed(100)
invalid_exp_population_sample1 <- invalid_exp_population_sample1 %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping

# invalid_exp_population_sample1 <- invalid_exp_population_sample1 %>% select(1:6, 8:12)

invalid_exp_population_sample1 <- data.frame( ungroup(invalid_exp_population_sample1) )

###

# OR Placebo :
invalid_exp_placebo_gp <- data.frame( ungroup(invalid_exp_population_controls) )
invalid_exp_placebo_gw <- data.frame( ungroup(invalid_exp_population_controls) )
# ----


# Gsynth : estimate model

## Controls desc

# D1 = treatment dummy

#1 olpr = if olpr, 1, else, 0
#2 TS = # of seats in a commune legislature;
#3 ncand_per_seat = the number of candidates per seat in a district

# As for the following variables:

#4 wap <- prod_wiek = the working-age population;
#5 unempl <- bezrob_rej_12_div_prod_wiek = the unemployment rate in communes (# unemployed inhabitants divided by wap);
#6 inc_pc_commune <- doch_wlasne_os = own income of a commune (per capita);

# we have only data for 2006, 2010 and 2014 years at a commune level. For 1998 and also 2002 we assumed the same distribution of values as in 2006.


## Basic summary statistics

summary( filter(invalid_exp_population_controls, olpr == 0 & elec == 2014 )$invalid_total )
summary( filter(invalid_exp_population_controls, olpr == 1 & elec == 2014 )$invalid_total )

summary( filter(invalid_exp_population_controls, olpr == 0 & elec == 2014 )$invalid_median )
summary( filter(invalid_exp_population_controls, olpr == 1 & elec == 2014 )$invalid_median )

##


## MODELS (Gsynth, ver. 1.1.2)

out0 <-
  gsynth(
  invalid_total ~ D1,
  Y = invalid_total,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_population_controls, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  r = c(0, 2),
  EM = F,
  force = "two-way",
  estimator = "ife",
  inference = "parametric",
  CV = T,
  seed = 2000
  )
# ----

out1 <-
  gsynth(
  invalid_total ~ D1 + olpr + TS + prod_wiek + ncand_per_seat + bezrob_rej_12_div_prod_wiek + doch_wlasne_os,
  Y = invalid_total,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_population_controls, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = T,
  r = c(1, 2),
  inference = "parametric",
  CV = T,
  force = "two-way",
  estimator = "ife",
  seed = 2000
  )
# ----

out2 <-
  gsynth(
  invalid_median ~ D1,
  Y = invalid_median,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data =  invalid_exp_population_gm_gw, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = T,
  inference = "parametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )
# ----

out3 <-
  gsynth(
  invalid_median ~ D1 + olpr + ncand_per_seat,
  Y = invalid_median,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_population_controls, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = T,
  inference = "parametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )

#----


# Print

print(out0)
plot(out0, theme.bw = T)
plot(out0, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = T )


print(out1)
plot(out1, theme.bw = T)
plot(out1, type = "counterfactual", raw = "none", xlab = "Year (elections)", theme.bw = T )
plot(out1, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = T )

plot(out1, type = "loadings", theme.bw = T)
plot(out1, type = "factors", theme.bw = T)

plot(out1, type = "missing")

# Details of the model 'out1'

out1$wgt.implied
out1$factor
out1$Y.tr
out1$Y.ct
out1$Y.bar
out1$eff

##

treated_population  <- filter( invalid_exp_population, level_2010 == "GM" & elec == 2014 )
control_units <- filter( invalid_exp_population, level_2010 != "GM" & elec == 2014 )

length(out1$lambda.co[,1])


# print(out2)
# plot(out2)
# plot(out2, type = "counterfactual", raw = "band", xlab = "Year (elections)" )
# plot(out2, type = "loadings")
# plot(out2, type = "factors")


```

# ---


```{r}
# Diagnostics/Placebo

out1_diag_sample1 <-
  gsynth(
  invalid_median ~ D1 + ncand_per_seat + olpr,
  Y = invalid_median,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_sample1, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = T,
  inference = "parametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )
# ----

out1_diag_sample2 <-
  gsynth(
  invalid_median ~ D1 + ncand_per_seat + olpr,
  Y = invalid_median,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_sample2, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = T,
  inference = "parametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )
# ----

out1_placebo_all_gw <-
  gsynth(
  invalid_total ~ D1 + olpr + TS + prod_wiek + ncand_per_seat + bezrob_rej_12_div_prod_wiek + doch_wlasne_os,
  Y = invalid_total,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_placebo_gw, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = F,
  inference = "nonparametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )
# ----

out1_placebo_all_gp <-
  gsynth(
  invalid_total ~ D1 + olpr + TS + prod_wiek + ncand_per_seat + bezrob_rej_12_div_prod_wiek + doch_wlasne_os,
  Y = invalid_total,
  D = D1,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_placebo_gp, # Select data set
  index = c("TERYT", "elec"),
  se = TRUE,
  EM = F,
  inference = "nonparametric",
  r = c(0, 2),
  CV = T,
  force = "two-way",
  seed = 2000
  )
# ----


# For sample 1 (stratified sample, seed = 11) :

print(out1_diag_sample1)
plot(out1_diag_sample1, type = "counterfactual", raw = "band", xlab = "Year (elections)" )

# For sample 2 (stratified sample, seed = 21) :

print(out1_diag_sample2)
plot(out1_diag_sample2, type = "counterfactual", raw = "band", xlab = "Year (elections)" )



# Placebo ALL for GP as treated

print(out1_placebo_all_gp)
plot(out1_placebo_all_gp, theme.bw = T)
plot(out1_placebo_all_gp, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = T )

# ----

# Placebo ALL for GW as treated

print(out1_placebo_all_gw)
plot(out1_placebo_all_gw, theme.bw = T)
plot(out1_placebo_all_gw, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = T )

# ----



# Placebo for sample 1 if GW as treated (stratified sample, seed = 11) :

print(out1_placebo_sample3)
plot(out1_placebo_sample3, type = "counterfactual", raw = "band", xlab = "Year (elections)" )

# Placebo for sample 2 if GW as treated (stratified sample, seed = 21) :

print(out1_placebo_sample4)
plot(out1_placebo_sample4, type = "counterfactual", raw = "band", xlab = "Year (elections)" )
# ----


# IFE

out <- interFE(Y ~ D1 + ncand_per_seat + olpr + as.factor(region), data = invalid_exp_group_new, index=c("TERYT", "elec"),
               r = 2, force = "two-way", nboots = 50)
```

# ---

```{r}

# DID

invalid_exp <- mutate(invalid, D1 = ifelse( (elec.x == 2010 & region == "14"), 1, 0 ), D2 = ifelse( (region == "14"), 1, 0 ), Policy = ifelse( (elec.x == 2010), 1, 0 ), invalid_dist = glosow_niewaznych / kart_waznych )

did1 <- lm(invalid_dist ~ Policy + D2 + Policy * D2, data = filter(invalid_exp, elec.x != 2014 & elec.x != 2002, szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GP") ))
summary(did1)


invalid_exp_2_group_did <- mutate(invalid_exp_2_group, Policy = ifelse( (elec.x == 2010), 1, 0 ), D2 = ifelse( (region == "14"), 1, 0 ) )
ggplot(data = filter(invalid_exp_2_group, szczebel == "RD" ) ) + geom_boxplot( aes( x = region, y = invalid_total )  ) + facet_grid(. ~ elec.x)


did2 <- lm(invalid_total ~ Policy + D2 + Policy * D2, data = invalid_exp_2_group_did)
summary(did2)
```

# ---


```{r}

## Plot
library(cowplot)
library(viridis)

ggplot(data = filter(invalid, region %in% c(2, 4, 6, 8, 18, 20, 22, 24), szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + geom_boxplot( aes( x = as.factor(region), y = glosow_niewaznych / kart_waznych, fill = level_2010 )  ) + facet_grid(. ~ elec) + xlab("Region ID") + ylim(c(0, 0.25)) + ylab("Proportions of invalid votes")

ggplot(data = filter(invalid, region %in% c(10, 12, 14, 16, 26, 28, 30, 32), szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + geom_boxplot( aes( x = as.factor(region), y = glosow_niewaznych / kart_waznych, fill = level_2010 )  ) + facet_grid(. ~ elec) + xlab("Region ID") + ylim(c(0, 0.25)) + ylab("Proportions of invalid votes")


ggplot(data = filter(invalid, szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + geom_boxplot( aes( x = as.factor(level_2010), y = glosow_niewaznych / kart_waznych, fill = level_2010 )  ) + facet_grid(. ~ elec) + theme(legend.position="bottom") + labs(fill = "Type") + ylab("Proportions of invalid votes") + xlab("Types of communes/municipalities") + ylim(c(0, 0.25))

###

ggplot(data = filter(invalid_exp_population, szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + facet_wrap(. ~ elec) + geom_pointrange( stat = "summary", aes( x = as.factor(region), y = invalid_total, shape = level_2010, color = level_2010 ), fun.y = median, fun.ymin = function(z) { quantile(z,0.25) }, fun.ymax = function(z) { quantile(z,0.75)}, position = position_dodge(width = 0.9)) + scale_color_viridis(discrete = T, option = "E", end = 0.8) + theme_cowplot() + theme(legend.position="bottom") + labs(color = "Type", shape = "Type") + ylab("Fraction of invalid votes (median values for regions)") + xlab("Region ID")


## Invalid totals for OLPR

ggplot(data = filter(invalid_exp_population_controls, szczebel == "RD" & (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") & (olpr == 1) ) ) + facet_wrap(. ~ elec) + geom_pointrange( stat = "summary", aes( x = as.factor(region), y = invalid_total, shape = level_2010 ), fun.y = median, fun.ymin = function(z) { quantile(z,0.25) }, fun.ymax = function(z) { quantile(z,0.75)}, position = position_dodge(width = 0.9)) + theme_cowplot() + theme(legend.position="bottom") + labs(color = "Type", shape = "Type") + ylab("Fraction of invalid votes (median values for regions)") + xlab("Region ID") 

##

ggplot(data = filter(invalid_exp_sample1, (level_2010 == "GM" | level_2010 == "GW" | level_2010 == "GP") ) ) + geom_boxplot( aes( x = as.factor(region), y = invalid_median, fill = level_2010 )  ) + facet_grid(. ~ elec) + xlab("Region ID")

ggplot(data = filter(invalid, szczebel == "WO" ) ) + geom_boxplot( aes( x = region, y = glosow_niewaznych / kart_waznych )  ) + facet_grid(. ~ elec)


```

# ---


# Additional checks [optional]

```{r}
invalid_exp_population_gw_gp <- filter( invalid_exp_population, level_2010 != "GM" )

###
invalid_exp_population_gw_gp <- invalid_exp_population_gw_gp %>% mutate(D2 = ifelse( (elec == 2014 & level_2010 == "GP"), 1, 0 ))

# OR

invalid_exp_population_gp_treated <- invalid_exp_population %>% mutate(D2 = ifelse( (elec == 2014 & level_2010 == "GP"), 1, 0 ))
###

set.seed(100)
invalid_exp_population_gw_gp <- invalid_exp_population_gw_gp %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.3*length(unique(TERYT))) )  ) # Grouping

invalid_exp_population_gw_gp <- data.frame( ungroup(invalid_exp_population_gw_gp) )
str(invalid_exp_population_gw_gp)

panelView(invalid_median ~ D2, data = invalid_exp_population_gw_gp,  index = c("TERYT", "elec"), type = "raw", theme.bw = T, xlab = "Year (elections)", ylab = "Median fractions of invalid votes in constituencies (communes)")


system.time( out_gp <-
gsynth(
  invalid_median ~ D2,
  Y = invalid_median,
  D = D2,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_population_gw_gp, # Select data set
  index = c("TERYT", "elec"),
  se = T,
  inference = "nonparametric",
  r = c(0, 2),
  CV = T,
  EM = F,
  force = "two-way",
  estimator = "ife",
  seed = 2000
))


print(out_gp)
plot(out_gp)

plot(out_gp, type = "raw")
plot(out_gp, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = TRUE )

plot(out_gp, type = "loadings")


### ---

set.seed(100)
invalid_exp_population_gp_treated <- invalid_exp_population_gp_treated %>% group_by(level_2010, region) %>% filter( TERYT %in% sample(unique(TERYT), ceiling(0.2*length(unique(TERYT))) )  ) # Grouping

invalid_exp_population_gp_treated <- data.frame( ungroup(invalid_exp_population_gp_treated) )

system.time( out_gp <-
gsynth(
  invalid_median ~ D2,
  Y = invalid_median,
  D = D2,
  parallel = T,
  min.T0 = 4,
  na.rm = T,
  data = invalid_exp_population_gp_treated, # Select data set
  index = c("TERYT", "elec"),
  se = T,
  inference = "nonparametric",
  r = c(0, 2),
  CV = T,
  EM = T,
  force = "two-way",
  estimator = "ife",
  seed = 2000
))

print(out_gp)
plot(out_gp)

plot(out_gp, type = "raw")
plot(out_gp, type = "counterfactual", raw = "band", xlab = "Year (elections)", theme.bw = TRUE )

plot(out_gp, type = "loadings")

```